import re
import matplotlib.pyplot as plt

def read_ppl_values(filename):
    """Reads PPL values from the given training output file, limited to 1500 iterations."""
    with open(filename, 'r') as file:
        file_content = file.read()
    # Using regex to extract iteration and PPL values from validation lines
    pattern = r"validation loss at iteration (\d+) \| lm loss value: [0-9.E+-]+ \| lm loss PPL: ([0-9.E+-]+) \|"
    matches = re.findall(pattern, file_content)
    iterations = []
    ppls = []
    for match in matches:
        iteration = int(match[0])
        if iteration <= 1500:  # Limit to 1500 iterations
            iterations.append(iteration)
            ppls.append(float(match[1]))
    return iterations, ppls

# File names as provided, and mapping them to optimizer names
files_and_labels = {
    "gpt-adam-6e-4.out": "Adam",
    "gpt-adamw-8e-4.out": "AdamW",
    "gpt-adabelief-5e-2.out": "Adabelief",
    "gpt-lamb-1e-2.out": "Lamb",
    "gpt-alto-1e-2.out": "ALTO"
}

plt.figure(figsize=(10, 6))

for filename, label in files_and_labels.items():
    iterations, ppls = read_ppl_values(filename)
    plt.plot(iterations, ppls, marker='o', linestyle='-', label=label)

# plt.title('PPL Variation over First 1500 Iterations on 345M GPT with GPT-2-Output-Dataset')
plt.xlabel('Iteration')
plt.ylabel('PPL')
plt.legend()
plt.grid(True)
plt.savefig("training_ppl_plot_notitle.pdf")  # Save the plot as a PNG file
